Practical Lab 10

Vanilla CNN and Fine-Tune VGG16 - for Dogs and Cats Classification

Dogs vs. Cats Dataset

In [ ]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator 
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.preprocessing.image import load_img
In [ ]:
# Load the data and preprocess the data
train_dir = r'C:\Users\Abdul\CSCN8010\data\kaggle_dogs_vs_cats_small\train'
test_dir = r'C:\Users\Abdul\CSCN8010\data\kaggle_dogs_vs_cats_small\test'
# Data preprocessing
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary')
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary')
Found 2000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.

EDA

In [ ]:
# class names
class_names = sorted(os.listdir(train_dir))
# Data Distribution
train_class_counts = {class_name: len(os.listdir(os.path.join(train_dir, class_name))) for class_name in class_names}
test_class_counts = {class_name: len(os.listdir(os.path.join(test_dir, class_name))) for class_name in class_names}
print("Training Data Class Counts:")
print(train_class_counts)
print("\nTesting Data Class Counts:")
print(test_class_counts)
# Data Visualization 
def plot_class_distribution(train_counts, test_counts):
    labels = list(train_counts.keys())
    train_values = list(train_counts.values())
    test_values = list(test_counts.values())

    x = np.arange(len(labels))
    width = 0.35

    fig, ax = plt.subplots()
    rects1 = ax.bar(x - width/2, train_values, width, label='Train')
    rects2 = ax.bar(x + width/2, test_values, width, label='Test')

    ax.set_ylabel('Count')
    ax.set_title('Class Distribution')
    ax.set_xticks(x)
    ax.set_xticklabels(labels)
    ax.legend()

    plt.show()
plot_class_distribution(train_class_counts, test_class_counts)
# Data Visualization 
def plot_sample_images(directory, class_names, num_samples=5):
    plt.figure(figsize=(10, 5*num_samples))
    for i, class_name in enumerate(class_names):
        class_dir = os.path.join(directory, class_name)
        sample_images = os.listdir(class_dir)[:num_samples]
        for j, image_name in enumerate(sample_images):
            image_path = os.path.join(class_dir, image_name)
            image = load_img(image_path, target_size=(224, 224))
            plt.subplot(num_samples, len(class_names), i * num_samples + j + 1)
            plt.imshow(image)
            plt.title(class_name)
            plt.axis("off")
    plt.show()
print("\nSample Training Images:")
plot_sample_images(train_dir, class_names)
print("\nSample Testing Images:")
plot_sample_images(test_dir, class_names)
Training Data Class Counts:
{'cat': 1000, 'dog': 1000}

Testing Data Class Counts:
{'cat': 1000, 'dog': 1000}
Sample Training Images:
Sample Testing Images:

Neural Network

In [ ]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
def create_custom_model(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    return model
custom_model = create_custom_model(input_shape=(224, 224, 3))

Fine-Tune VGG16 (pre-trained on ImageNet)

In [ ]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.callbacks import ModelCheckpoint
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
for layer in base_model.layers:
    layer.trainable = False
x = Flatten()(base_model.output)
x = Dense(128, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)
fine_tuned_model = Model(inputs=base_model.input, outputs=predictions)
fine_tuned_model.compile(optimizer='adam',
                         loss='binary_crossentropy',
                         metrics=['accuracy'])
checkpoint = ModelCheckpoint(filepath='best_model_vgg16.h5',
                             monitor='val_accuracy',
                             save_best_only=True,
                             verbose=1)
callbacks = [checkpoint]
history = fine_tuned_model.fit(train_generator,
                                epochs=10,
                                validation_data=test_generator,
                                callbacks=callbacks)
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
58889256/58889256 [==============================] - 18s 0us/step
Epoch 1/10
63/63 [==============================] - ETA: 0s - loss: 0.6750 - accuracy: 0.7685
Epoch 1: val_accuracy improved from -inf to 0.89100, saving model to best_model_vgg16.h5
63/63 [==============================] - 429s 7s/step - loss: 0.6750 - accuracy: 0.7685 - val_loss: 0.2639 - val_accuracy: 0.8910
Epoch 2/10
63/63 [==============================] - ETA: 0s - loss: 0.1912 - accuracy: 0.9335
Epoch 2: val_accuracy improved from 0.89100 to 0.90450, saving model to best_model_vgg16.h5
63/63 [==============================] - 471s 8s/step - loss: 0.1912 - accuracy: 0.9335 - val_loss: 0.2252 - val_accuracy: 0.9045
Epoch 3/10
63/63 [==============================] - ETA: 0s - loss: 0.1271 - accuracy: 0.9615
Epoch 3: val_accuracy improved from 0.90450 to 0.91350, saving model to best_model_vgg16.h5
63/63 [==============================] - 650s 10s/step - loss: 0.1271 - accuracy: 0.9615 - val_loss: 0.2091 - val_accuracy: 0.9135
Epoch 4/10
63/63 [==============================] - ETA: 0s - loss: 0.0747 - accuracy: 0.9845
Epoch 4: val_accuracy did not improve from 0.91350
63/63 [==============================] - 697s 11s/step - loss: 0.0747 - accuracy: 0.9845 - val_loss: 0.2051 - val_accuracy: 0.9110
Epoch 5/10
63/63 [==============================] - ETA: 0s - loss: 0.0629 - accuracy: 0.9835
Epoch 5: val_accuracy did not improve from 0.91350
63/63 [==============================] - 485s 8s/step - loss: 0.0629 - accuracy: 0.9835 - val_loss: 0.2233 - val_accuracy: 0.9050
Epoch 6/10
63/63 [==============================] - ETA: 0s - loss: 0.0366 - accuracy: 0.9960
Epoch 6: val_accuracy did not improve from 0.91350
63/63 [==============================] - 452s 7s/step - loss: 0.0366 - accuracy: 0.9960 - val_loss: 0.2108 - val_accuracy: 0.9120
Epoch 7/10
63/63 [==============================] - ETA: 0s - loss: 0.0212 - accuracy: 0.9985
Epoch 7: val_accuracy did not improve from 0.91350
63/63 [==============================] - 508s 8s/step - loss: 0.0212 - accuracy: 0.9985 - val_loss: 0.2191 - val_accuracy: 0.9125
Epoch 8/10
63/63 [==============================] - ETA: 0s - loss: 0.0142 - accuracy: 1.0000
Epoch 8: val_accuracy did not improve from 0.91350
63/63 [==============================] - 515s 8s/step - loss: 0.0142 - accuracy: 1.0000 - val_loss: 0.2240 - val_accuracy: 0.9095
Epoch 9/10
63/63 [==============================] - ETA: 0s - loss: 0.0106 - accuracy: 1.0000
Epoch 9: val_accuracy did not improve from 0.91350
63/63 [==============================] - 521s 8s/step - loss: 0.0106 - accuracy: 1.0000 - val_loss: 0.2241 - val_accuracy: 0.9110
Epoch 10/10
63/63 [==============================] - ETA: 0s - loss: 0.0087 - accuracy: 1.0000
Epoch 10: val_accuracy did not improve from 0.91350
63/63 [==============================] - 523s 8s/step - loss: 0.0087 - accuracy: 1.0000 - val_loss: 0.2293 - val_accuracy: 0.9115
In [ ]:
# callback to save the best model 
checkpoint_custom = ModelCheckpoint(filepath='best_model_custom.h5',
                                    monitor='val_accuracy',
                                    save_best_only=True,
                                    verbose=1)
# Train the model with the callback
history_custom = custom_model.fit(train_generator,
                                  epochs=10,
                                  validation_data=test_generator,
                                  callbacks=[checkpoint_custom])
Epoch 1/10
63/63 [==============================] - ETA: 0s - loss: 0.7665 - accuracy: 0.5095
Epoch 1: val_accuracy improved from -inf to 0.51150, saving model to best_model_custom.h5
63/63 [==============================] - 68s 1s/step - loss: 0.7665 - accuracy: 0.5095 - val_loss: 0.6896 - val_accuracy: 0.5115
Epoch 2/10
63/63 [==============================] - ETA: 0s - loss: 0.6620 - accuracy: 0.5950
Epoch 2: val_accuracy improved from 0.51150 to 0.64600, saving model to best_model_custom.h5
63/63 [==============================] - 69s 1s/step - loss: 0.6620 - accuracy: 0.5950 - val_loss: 0.6264 - val_accuracy: 0.6460
Epoch 3/10
63/63 [==============================] - ETA: 0s - loss: 0.6041 - accuracy: 0.6710
Epoch 3: val_accuracy improved from 0.64600 to 0.69650, saving model to best_model_custom.h5
63/63 [==============================] - 76s 1s/step - loss: 0.6041 - accuracy: 0.6710 - val_loss: 0.5996 - val_accuracy: 0.6965
Epoch 4/10
63/63 [==============================] - ETA: 0s - loss: 0.4857 - accuracy: 0.7655
Epoch 4: val_accuracy improved from 0.69650 to 0.72150, saving model to best_model_custom.h5
63/63 [==============================] - 72s 1s/step - loss: 0.4857 - accuracy: 0.7655 - val_loss: 0.5735 - val_accuracy: 0.7215
Epoch 5/10
63/63 [==============================] - ETA: 0s - loss: 0.3982 - accuracy: 0.8195
Epoch 5: val_accuracy did not improve from 0.72150
63/63 [==============================] - 69s 1s/step - loss: 0.3982 - accuracy: 0.8195 - val_loss: 0.5616 - val_accuracy: 0.7135
Epoch 6/10
63/63 [==============================] - ETA: 0s - loss: 0.2935 - accuracy: 0.8740
Epoch 6: val_accuracy did not improve from 0.72150
63/63 [==============================] - 71s 1s/step - loss: 0.2935 - accuracy: 0.8740 - val_loss: 0.6717 - val_accuracy: 0.7150
Epoch 7/10
63/63 [==============================] - ETA: 0s - loss: 0.1736 - accuracy: 0.9265
Epoch 7: val_accuracy did not improve from 0.72150
63/63 [==============================] - 76s 1s/step - loss: 0.1736 - accuracy: 0.9265 - val_loss: 0.7933 - val_accuracy: 0.6990
Epoch 8/10
63/63 [==============================] - ETA: 0s - loss: 0.1063 - accuracy: 0.9645
Epoch 8: val_accuracy did not improve from 0.72150
63/63 [==============================] - 75s 1s/step - loss: 0.1063 - accuracy: 0.9645 - val_loss: 0.9146 - val_accuracy: 0.6960
Epoch 9/10
63/63 [==============================] - ETA: 0s - loss: 0.0452 - accuracy: 0.9880
Epoch 9: val_accuracy did not improve from 0.72150
63/63 [==============================] - 74s 1s/step - loss: 0.0452 - accuracy: 0.9880 - val_loss: 1.3874 - val_accuracy: 0.7005
Epoch 10/10
63/63 [==============================] - ETA: 0s - loss: 0.0222 - accuracy: 0.9915
Epoch 10: val_accuracy did not improve from 0.72150
63/63 [==============================] - 74s 1s/step - loss: 0.0222 - accuracy: 0.9915 - val_loss: 1.3555 - val_accuracy: 0.6995

Relative performance of the models

In [ ]:
from sklearn.metrics import precision_recall_curve
custom_model_best = load_model('best_model_custom.h5')
fine_tuned_model_best = load_model('best_model_vgg16.h5')
custom_model_predictions = custom_model_best.predict(test_generator).flatten()
custom_model_accuracy = accuracy_score(test_generator.classes, custom_model_predictions.round())
custom_model_confusion_matrix = confusion_matrix(test_generator.classes, custom_model_predictions.round())
custom_model_classification_report = classification_report(test_generator.classes, custom_model_predictions.round())
fine_tuned_model_predictions = fine_tuned_model_best.predict(test_generator).flatten()
fine_tuned_model_accuracy = accuracy_score(test_generator.classes, fine_tuned_model_predictions.round())
fine_tuned_model_confusion_matrix = confusion_matrix(test_generator.classes, fine_tuned_model_predictions.round())
fine_tuned_model_classification_report = classification_report(test_generator.classes, fine_tuned_model_predictions.round())
precision_custom, recall_custom, _ = precision_recall_curve(test_generator.classes, custom_model_predictions)
precision_vgg16, recall_vgg16, _ = precision_recall_curve(test_generator.classes, fine_tuned_model_predictions)
plt.plot(recall_custom, precision_custom, label='Custom Model')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve (Custom Model)')
plt.legend()
plt.show()
print("Custom Model:")
print("Accuracy:", custom_model_accuracy)
print("Confusion Matrix:\n", custom_model_confusion_matrix)
print("Classification Report:\n", custom_model_classification_report)
print("\nFine-Tuned VGG16 Model:")
print("Accuracy:", fine_tuned_model_accuracy)
print("Confusion Matrix:\n", fine_tuned_model_confusion_matrix)
print("Classification Report:\n", fine_tuned_model_classification_report)
63/63 [==============================] - 12s 183ms/step
63/63 [==============================] - 252s 4s/step
Custom Model:
Accuracy: 0.5125
Confusion Matrix:
 [[493 507]
 [468 532]]
Classification Report:
               precision    recall  f1-score   support

           0       0.51      0.49      0.50      1000
           1       0.51      0.53      0.52      1000

    accuracy                           0.51      2000
   macro avg       0.51      0.51      0.51      2000
weighted avg       0.51      0.51      0.51      2000


Fine-Tuned VGG16 Model:
Accuracy: 0.5045
Confusion Matrix:
 [[491 509]
 [482 518]]
Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.49      0.50      1000
           1       0.50      0.52      0.51      1000

    accuracy                           0.50      2000
   macro avg       0.50      0.50      0.50      2000
weighted avg       0.50      0.50      0.50      2000

Conclusions:

  • Accomplished nice precision on the Cat vs Dogs classification task, however it was beated by the fine-tuned VGG16 model
  • Fine-tuning VGG16 on the Cats vs Dogs dataset altogether developed execution contrasted with the custom model.
  • Both models show good precision and recall,showing a fair presentation as far as distinguishing the two cats and dogs accurately.
  • The precision-recall curve shows the compromise among accuracy and review for the custom model.